------------------------------------------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  E:\REStat_MS14767_Vol96(2)\Data preparation Compustat segment\8_links.log
  log type:  text
 opened on:  19 Dec 2014, 18:42:42

. *-----------------------------------------------------------------
. 
. cap set mem 1000m

. cap set mat 2000

. 
. 
. use "raw_rjv_panel_america.dta", clear

. 
. *************
. * here we retrive some relevant information (SIC4, market shares, HHI) of the firms from compustat
. *************
. 
. so ticker year

. merge m:1 ticker year using "segment_wide.dta"

    Result                           # of obs.
    -----------------------------------------
    not matched                       139,003
        from master                    25,881  (_merge==1)
        from using                    113,122  (_merge==2)

    matched                            47,129  (_merge==3)
    -----------------------------------------

. tab _merge

                 _merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        master only (1) |     25,881       13.90       13.90
         using only (2) |    113,122       60.78       74.68
            matched (3) |     47,129       25.32      100.00
------------------------+-----------------------------------
                  Total |    186,132      100.00

. 
. ****************************************************
. 
. 
. *************
. * drop firms in compustat, which are not in the RJV database
. * drop firms in the database for which we don't have MS information
. *************
. 
. drop if _merge==2
(113122 observations deleted)

. drop if _merge==1
(25881 observations deleted)

. drop _merge

. 
. 
. **************
. * we drop all observation where the firm was not yet an insider
. **************
. 
. drop if ins==0
(24647 observations deleted)

. 
. **********************
. * we keep only relevant variables
. ************************
. 
. keep rjvname entryname entityname ticker year SIC* sic4 MS* comnum rjvnum

. 
. 
. egen comnum2=group(entityname)

. drop comnum

. rename comnum2 comnum

. 
. egen rjvnum2=group(rjvname)

. drop rjvnum

. rename rjvnum2 rjvnum

. 
. 
. 
. *the maximum number of industries is 459
. sum year

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
        year |     22482    1994.858    3.255954       1986       1999

. *the maximum number of years is 14
. 
. 
. 
. *********************************************************************************************************************
. *********************************************************************************************************************
. ****
. ****                     Here we count the links with firms from any industry
. ****
. *********************************************************************************************************************
. *********************************************************************************************************************
. 
. so year comnum rjvnum

. 
. egen seq=seq(), by(year)

. egen seq_SIC=group(year)

. egen S=max(seq), by(year)

. 
. * we generate empty vectors for the link measures, which will be generated in mata
. 
. gen measure1=.
(22482 missing values generated)

. gen measure2=.
(22482 missing values generated)

. 
. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     22482    2168.294     774.491        247       2934

. 
. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs'{
  2. qui by year: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year)
  4. qui by year: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

. 
. 
. 
. **** here we go into mata
. 
. clear mata

. mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
: rjv = st_data(. ,("rjvnum"))

: comnum = st_data(. ,("comnum"))

: seq_SIC= st_data(. , ("seq_SIC")) 

: S= st_data(. , ("S")) 

: n_var =  st_nvar()

: 
: measure1_allfirm = 0

: measure2_allfirm = 0

: 
: 
: 
:         s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:         index=(s::n_var)'

:         st_view(f=.,.,index)

:         f=f

: 
: max_SIC = max(seq_SIC)

: 
: 
: 
: 
: for(i=1;i<=max_SIC;i++) {
> 
> rowindex = mm_which(seq_SIC:==i)
> comnum_rel = comnum[rowindex]
> nr_rows = length(comnum_rel)
> uniquefirms = uniqrows(comnum_rel)
> nr_unique = length(uniquefirms)
>         measurevect1 = comnum_rel*0
>         measurevect2 = comnum_rel*0
> 
> if (nr_unique==1) {
> nr_entry1 = length(comnum_rel)
> measurevect1=mm_repeat(0,nr_entry1)
> measurevect2=mm_repeat(0,nr_entry1)
> 
> }
> 
> if (nr_unique>1) {
> 
> 
> for (j=1;j<=nr_unique;j++) {
>         firm_rel = uniquefirms[j]
>         firm_position = mm_which(comnum_rel:==firm_rel)
>         nr_firm_entry = length(firm_position)
> 
> 
>         measure = f[rowindex,firm_position]
>         /*
>         check = rowmax(measure)
>         measurevect = rowsum(measure)
>         check2 = check-measurevect
>         nonzero = mm_which(check2:<0)
>         alert = length(nonzero)
>         */
>         
>         measurevect0 = rowsum(rowmax(measure))
>         notself = mm_which(measurevect0:!=firm_rel)                                     /*theoretically this step is not nessessary,
>                                                                                                                                         
>                         it makes sure that links with itself are 
>                                                                                                                                         
>                         not counted. */                 
>         measurevect = measurevect0[notself]
>         nonzero_pos = mm_which(measurevect:>0)
>         
>         
>         measure1 = length(measurevect[nonzero_pos])
>         measure2 = length(uniqrows(measurevect[nonzero_pos]))
> 
> 
>         measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
>         measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
>         
>         
>         }
> } /* end if */
> 
>         measure1_allfirm = (measure1_allfirm \ measurevect1)
>         measure2_allfirm = (measure2_allfirm \ measurevect2)
> }
                 <istmt>:  3499  mm_which() not found
r(3499);

: 
: 
: n = length(measure1_allfirm)

: st_store(., ("measure1"), measure1_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

: st_store(., ("measure2"), measure2_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

: 
: 
: end
------------------------------------------------------------------------------------------------------------------------------------------

. 
. 
. rename measure1 links1_tot

. label var links1_tot "Total direct links with double counting-RJV"

. rename measure2 links2_tot

. label var links2_tot "Total direct links without double counting-RJV"

. 
. 
. 
. 
. 
. *********************************************************************************************************************
. *********************************************************************************************************************
. ****
. ****                     Here we count the links with firms from the same industry, i.e. direct competitors
. ****
. *********************************************************************************************************************
. *********************************************************************************************************************
. drop f*

. 
. reshape long SIC MS, i(rjvnum ticker year) j(seq_sic)
(note: j = 1 2 3 4 5 6 7 8 9 10)

Data                               wide   ->   long
-----------------------------------------------------------------------------
Number of obs.                    22482   ->  224820
Number of variables                  33   ->      16
j variable (10 values)                    ->   seq_sic
xij variables:
                    SIC1 SIC2 ... SIC10   ->   SIC
                       MS1 MS2 ... MS10   ->   MS
-----------------------------------------------------------------------------

. drop if SIC ==.
(164246 observations deleted)

. drop seq seq_SIC S

. so year SIC comnum rjvnum

. 
. egen seq=seq(), by(year SIC)

. egen seq_SIC=group(year SIC)

. egen S=max(seq), by(year SIC)

. 
. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

. 
. 
. 
. 
. 
. 
.         ******************************************************************************
.         **** here we pu the f Matrix into mata
. 
.                         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                         rjv = st_data(. ,("rjvnum"))

:                         comnum = st_data(. ,("comnum"))

:                         seq_SIC= st_data(. , ("seq_SIC")) 

:                         S= st_data(. , ("S")) 

:                         n_var =  st_nvar()

: 
:                         s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:                         index=(s::n_var)'

:                         st_view(f=.,.,index)

:                         f=f

: 
:                         end
------------------------------------------------------------------------------------------------------------------------------------------

.         ******************************************************************************
. drop f*

. gen measure1=.
(60574 missing values generated)

. gen measure2=.
(60574 missing values generated)

. gen measure3=.
(60574 missing values generated)

. gen measure4=. 
(60574 missing values generated)

. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' 
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

.                 
. 
.                 
.                 ********************************************************************************************
.                 ********************************************************************************************
.                 * Since the market shares don't nessessarily uniquely identify each firm withinn a SIC code,
.                 * it is helpfull to generate the number of links and and the aggregated marketshare of those
.                 * firms, which form the links together
.                 * In that way we can use the position of unique firms in the measurevector from the 
.                 * comnum identifier to identifa the unique marketshares.
.                 ********************************************************************************************
.                 ********************************************************************************************
.                 
.                 
.                 **** here we put the ms Matrix into mata
. 
.         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                 
: 
:                 n_var =  st_nvar()

:                 s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:                 index=(s::n_var)'

:                 st_view(ms=.,.,index)

:                 ms=ms

: 
:         measure1_allfirm = 0

:         measure2_allfirm = 0

:         measure3_allfirm = 0

:         measure4_allfirm = 0

:         
:         max_SIC = max(seq_SIC)

: 
: 
:         for(i=1;i<=max_SIC;i++) {
> 
>         rowindex = mm_which(seq_SIC:==i)
>         comnum_rel = comnum[rowindex]
>         nr_rows = length(comnum_rel)
>         uniquefirms = uniqrows(comnum_rel)
>         nr_unique = length(uniquefirms)
>                 measurevect1 = comnum_rel*0
>                 measurevect2 = comnum_rel*0
>                 measurevect3 = comnum_rel*0
>                 measurevect4 = comnum_rel*0
> 
>         if (nr_unique==1) {
>         nr_entry1 = length(comnum_rel)
>         measurevect1=mm_repeat(0,nr_entry1)
>         measurevect2=mm_repeat(0,nr_entry1)
>         measurevect3=mm_repeat(0,nr_entry1)
>         measurevect4=mm_repeat(0,nr_entry1)
> 
>         }
> 
>         if (nr_unique>1) {
> 
> 
>         for (j=1;j<=nr_unique;j++) {
>                 firm_rel = uniquefirms[j]
>                 firm_position = mm_which(comnum_rel:==firm_rel)
>                 nr_firm_entry = length(firm_position)
> 
> 
>                 measure1 = f[rowindex,firm_position]
>                 measure2 = ms[rowindex,firm_position]
>                 
>                 /*
>                 check = rowmax(measure1)
>                 measurevect01 = rowsum(measure1)
>                 check2 = check-measurevect01
>                 nonzero = mm_which(check2:<0)
>                 alert = length(nonzero)
>                 */
>                 
>                 measurevect001 = rowsum(rowmax(measure1))
>                 measurevect002 = rowsum(rowmax(measure2))
>                 
>                 notself = mm_which(measurevect001:!=firm_rel)
>                 
>                 
>                 measurevect01 = measurevect001[notself]
>                 measurevect02 = measurevect002[notself]                                                         /*theoretically this ste
> p is not nessessary,
>                                                                                                                                         
>                         it makes sure that links with itself are 
>                                                                                                                                         
>                         not counted. */         
>                 nonzero_pos = mm_which(measurevect01:>0)
>                 
>                 measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos])/*at this point, marketshare and firm information a
> re linked
>                                                                                                                                         
>                         such that we can identify the list of unique links using 
>                                                                                                                                         
>                         the firm information*/
>                 
>                 measuremat2 = uniqrows(measuremat1)             
>         
>         
>                 measure1 = rows(measuremat1)
>                 measure2 = rows(measuremat2)
>                 measure3 = colsum(measuremat1[.,2])
>                 measure4 = colsum(measuremat2[.,2])
>                 
>         measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
>         measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
>         measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
>         measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)   
>                 
>                 }
>         } /* end if */
> 
>                 measure1_allfirm = (measure1_allfirm \ measurevect1)
>                 measure2_allfirm = (measure2_allfirm \ measurevect2)
>                 measure3_allfirm = (measure3_allfirm \ measurevect3)
>                 measure4_allfirm = (measure4_allfirm \ measurevect4)
>         }
                 <istmt>:  3499  mm_which() not found
r(3499);

: 
: 
:         n = length(measure1_allfirm)

:         st_store(., ("measure1"), measure1_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure2"), measure2_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure3"), measure3_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure4"), measure4_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

: 
: 
: end
------------------------------------------------------------------------------------------------------------------------------------------

. 
. 
. rename measure1 links1

. label var links1 "Direct links with double counting"

. rename measure2 links2

. label var links2 "Direct links without double counting"

. rename measure3 links_MS1_tot

. label var links_MS1_tot "Market share of total direct links with double counting-RJV"

. rename measure4 links_MS2_tot

. label var links_MS2_tot "Market share of total direct links without double counting-RJV"

. 
. 
. 
. 
. 
. 
. 
. *********************************************************************************************************************
. *********************************************************************************************************************
. ****
. ****                     Here we count the links with firms from the same industry
. ****                     which are in a RJV whose SIC4 code is the same as that of the firm
. ****
. *********************************************************************************************************************
. *********************************************************************************************************************
. drop ms* 

. 
. so year SIC comnum rjvnum

. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC==sic4
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

. 
. 
. 
. ******************************************************************************
.         **** here we pu the f Matrix into mata
. 
.                         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                         rjv = st_data(. ,("rjvnum"))

:                         comnum = st_data(. ,("comnum"))

:                         seq_SIC= st_data(. , ("seq_SIC")) 

:                         S= st_data(. , ("S")) 

:                         n_var =  st_nvar()

: 
:                         s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:                         index=(s::n_var)'

:                         st_view(f=.,.,index)

:                         f=f

: 
:                         end
------------------------------------------------------------------------------------------------------------------------------------------

.         ******************************************************************************
. drop f*

.         
. gen measure1=.
(60574 missing values generated)

. gen measure2=.
(60574 missing values generated)

. gen measure3=.
(60574 missing values generated)

. gen measure4=. 
(60574 missing values generated)

. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' 
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

.                 
. 
.                 
.                 ********************************************************************************************
.                 ********************************************************************************************
.                 * Since the market shares don't nessessarily uniquely identify each firm withinn a SIC code,
.                 * it is helpfull to generate the number of links and and the aggregated marketshare of those
.                 * firms, which form the links together
.                 * In that way we can use the position of unique firms in the measurevector from the 
.                 * comnum identifier to identifa the unique marketshares.
.                 ********************************************************************************************
.                 ********************************************************************************************
.                 
.                 
.                 **** here we pu the ms Matrix into mata
. 
.         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                         n_var =  st_nvar()

: 
:                         s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:                         index=(s::n_var)'

:                         st_view(ms=.,.,index)

:                         ms = ms

:                 
: 
:         measure1_allfirm = 0

:         measure2_allfirm = 0

:         measure3_allfirm = 0

:         measure4_allfirm = 0

:         
:         max_SIC = max(seq_SIC)

: 
: 
:         for(i=1;i<=max_SIC;i++) {
> 
>         rowindex = mm_which(seq_SIC:==i)
>         comnum_rel = comnum[rowindex]
>         nr_rows = length(comnum_rel)
>         uniquefirms = uniqrows(comnum_rel)
>         nr_unique = length(uniquefirms)
>                 measurevect1 = comnum_rel*0
>                 measurevect2 = comnum_rel*0
>                 measurevect3 = comnum_rel*0
>                 measurevect4 = comnum_rel*0
> 
>         if (nr_unique==1) {
>         nr_entry1 = length(comnum_rel)
>         measurevect1=mm_repeat(0,nr_entry1)
>         measurevect2=mm_repeat(0,nr_entry1)
>         measurevect3=mm_repeat(0,nr_entry1)
>         measurevect4=mm_repeat(0,nr_entry1)
> 
>         }
> 
>         if (nr_unique>1) {
> 
> 
>         for (j=1;j<=nr_unique;j++) {
>                 firm_rel = uniquefirms[j]
>                 firm_position = mm_which(comnum_rel:==firm_rel)
>                 nr_firm_entry = length(firm_position)
> 
> 
>                 measure1 = f[rowindex,firm_position]
>                 measure2 = ms[rowindex,firm_position]
>                 
>                 /*
>                 check = rowmax(measure1)
>                 measurevect01 = rowsum(measure1)
>                 check2 = check-measurevect01
>                 nonzero = mm_which(check2:<0)
>                 alert = length(nonzero)
>                 */
>                 
>                 measurevect001 = rowsum(rowmax(measure1))
>                 measurevect002 = rowsum(rowmax(measure2))
>                 
>                 notself = mm_which(measurevect001:!=firm_rel)
>                 
>                 
>                 measurevect01 = measurevect001[notself]                                                         
>                 measurevect02 = measurevect002[notself]                                                         /*theoretically this ste
> p is not nessessary,
>                                                                                                                                         
>                         it makes sure that links with itself are 
>                                                                                                                                         
>                         not counted. */ 
>                 
>                 nonzero_pos = mm_which(measurevect01:>0)
>                 
>                 measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos])/*at this point, marketshare and firm information a
> re linked
>                                                                                                                                         
>                         such that we can identify the list of unique links using 
>                                                                                                                                         
>                         the firm information*/
>                 
>                 measuremat2 = uniqrows(measuremat1)             
>         
>         
>                 measure1 = rows(measuremat1)
>                 measure2 = rows(measuremat2)
>                 measure3 = colsum(measuremat1[.,2])
>                 measure4 = colsum(measuremat2[.,2])
>                 
>         measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
>         measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
>         measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
>         measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)   
>                 
>                 }
>         } /* end if */
> 
>                 measure1_allfirm = (measure1_allfirm \ measurevect1)
>                 measure2_allfirm = (measure2_allfirm \ measurevect2)
>                 measure3_allfirm = (measure3_allfirm \ measurevect3)
>                 measure4_allfirm = (measure4_allfirm \ measurevect4)
>         }
                 <istmt>:  3499  mm_which() not found
r(3499);

: 
: 
:         n = length(measure1_allfirm)

:         st_store(., ("measure1"), measure1_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure2"), measure2_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure3"), measure3_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure4"), measure4_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

: 
: 
: end
------------------------------------------------------------------------------------------------------------------------------------------

. rename measure1 links1_same

. label var links1_same "Direct links with double counting-RJV, Firm-SIC = RJV-SIC"

. rename measure2 links2_same

. label var links2_same "Direct links without double counting-RJV, Firm-SIC = RJV-SIC"

. rename measure3 links_MS1_same

. label var links_MS1_same "Market share of direct links with double counting-RJV, Firm-SIC = RJV-SIC"

. rename measure4 links_MS2_same

. label var links_MS2_same "Market share of direct links without double counting-RJV, Firm-SIC = RJV-SIC"

. 
. *********************************************************************************************************************
. *********************************************************************************************************************
. ****
. ****                     Here we count the links with firms from the same industry
. ****                     which are in a RJV whose SIC4 code is not the same as that of the firm
. ****
. *********************************************************************************************************************
. *********************************************************************************************************************
. drop ms*

. 
. so year SIC comnum rjvnum

. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC!=sic4
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

. 
. ******************************************************************************
.         **** here we pu the f Matrix into mata
. 
.                         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                         rjv = st_data(. ,("rjvnum"))

:                         comnum = st_data(. ,("comnum"))

:                         seq_SIC= st_data(. , ("seq_SIC")) 

:                         S= st_data(. , ("S")) 

:                         n_var =  st_nvar()

: 
:                         s = n_var - max(S)+1 /*position of the first Variable of the Matrix*/

:                         index=(s::n_var)'

:                         st_view(f=.,.,index)

:                         f=f

: 
:                         end
------------------------------------------------------------------------------------------------------------------------------------------

. ******************************************************************************
. drop f*

. 
. * we generate empty vectors for the link measures, which will be generated in mata      
. gen measure1=.
(60574 missing values generated)

. gen measure2=.
(60574 missing values generated)

. gen measure3=.
(60574 missing values generated)

. gen measure4=. 
(60574 missing values generated)

. 
. sum S

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           S |     60574    86.86961    82.85097          1        394

. local max_obs = r(max)

. local i=1

. forvalues i=1/`max_obs' {
  2. qui by year SIC: gen x`i'=rjvnum if seq==`i'
  3. qui egen rjvnum`i'=max(x`i'), by(year SIC)
  4. qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC!=sic4
  5. drop x`i'
  6. drop rjvnum`i'
  7. }

. 
. 
. **************************************************************************************************              
. **** here we pu the ms Matrix into mata
. 
.         mata
------------------------------------------------- mata (type end to exit) ----------------------------------------------------------------
: 
:                         n_var =  st_nvar()

: 
:                         s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/

:                         index=(s::n_var)'

:                         st_view(ms=.,.,index)

:                         ms = ms

:                 
: 
:         measure1_allfirm = 0

:         measure2_allfirm = 0

:         measure3_allfirm = 0

:         measure4_allfirm = 0

:         
:         max_SIC = max(seq_SIC)

: 
: 
:         for(i=1;i<=max_SIC;i++) {
> 
>         rowindex = mm_which(seq_SIC:==i)
>         comnum_rel = comnum[rowindex]
>         nr_rows = length(comnum_rel)
>         uniquefirms = uniqrows(comnum_rel)
>         nr_unique = length(uniquefirms)
>                 measurevect1 = comnum_rel*0
>                 measurevect2 = comnum_rel*0
>                 measurevect3 = comnum_rel*0
>                 measurevect4 = comnum_rel*0
> 
>         if (nr_unique==1) {
>         nr_entry1 = length(comnum_rel)
>         measurevect1=mm_repeat(0,nr_entry1)
>         measurevect2=mm_repeat(0,nr_entry1)
>         measurevect3=mm_repeat(0,nr_entry1)
>         measurevect4=mm_repeat(0,nr_entry1)
> 
>         }
> 
>         if (nr_unique>1) {
> 
> 
>         for (j=1;j<=nr_unique;j++) {
>                 firm_rel = uniquefirms[j]
>                 firm_position = mm_which(comnum_rel:==firm_rel)
>                 nr_firm_entry = length(firm_position)
> 
> 
>                 measure1 = f[rowindex,firm_position]
>                 measure2 = ms[rowindex,firm_position]
>                 
>                 /*
>                 check = rowmax(measure1)
>                 measurevect01 = rowsum(measure1)
>                 check2 = check-measurevect01
>                 nonzero = mm_which(check2:<0)
>                 alert = length(nonzero)
>                 */
>                 
>                 measurevect001 = rowsum(rowmax(measure1))
>                 measurevect002 = rowsum(rowmax(measure2))
>                 
>                 notself = mm_which(measurevect001:!=firm_rel)                                           /*theoretically this step is not
>  nessessary,
>                                                                                                                                         
>                         it makes sure that links with itself are 
>                                                                                                                                         
>                         not counted. */
>                 
>                 
>                 measurevect01 = measurevect001[notself]
>                 measurevect02 = measurevect002[notself]
>                 
>                 nonzero_pos = mm_which(measurevect01:>0)
>                 
>                 measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos]) /*at this point, marketshare and firm information 
> are linked
>                                                                                                                                         
>                         such that we can identify the list of unique links using 
>                                                                                                                                         
>                         the firm information*/
>                 
>                 measuremat2 = uniqrows(measuremat1)             
>         
>         
>                 measure1 = rows(measuremat1)
>                 measure2 = rows(measuremat2)
>                 measure3 = colsum(measuremat1[.,2])
>                 measure4 = colsum(measuremat2[.,2])
>                 
>         measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
>         measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
>         measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
>         measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)   
>                 
>                 }
>         } /* end if */
> 
>                 measure1_allfirm = (measure1_allfirm \ measurevect1)
>                 measure2_allfirm = (measure2_allfirm \ measurevect2)
>                 measure3_allfirm = (measure3_allfirm \ measurevect3)
>                 measure4_allfirm = (measure4_allfirm \ measurevect4)
>         }
                 <istmt>:  3499  mm_which() not found
r(3499);

: 
: 
:         n = length(measure1_allfirm)

:         st_store(., ("measure1"), measure1_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure2"), measure2_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure3"), measure3_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

:         st_store(., ("measure4"), measure4_allfirm[2::n])
                 <istmt>:  3301  subscript invalid
r(3301);

: 
: end
------------------------------------------------------------------------------------------------------------------------------------------

. **************************************************************************************
. 
. rename measure1 links1_diff

. label var links1_diff "Direct links with double counting-RJV, Firm-SIC is not RJV-SIC"

. rename measure2 links2_diff

. label var links2_diff "Direct links without double counting-RJV in same SIC, Firm-SIC is not RJV-SIC"

. rename measure3 links_MS1_diff

. label var links_MS1_diff "Market share of direct links with double counting-RJV, Firm-SIC is not RJV-SIC"

. rename measure4 links_MS2_diff

. label var links_MS2_diff "Market share of direct links without double counting-RJV, Firm-SIC is not RJV-SIC"
note: label truncated to 80 characters

. 
. drop ms*

. 
. 
. save "links_big.dta", replace
file links_big.dta saved

. 
. egen s = seq(), by(year comnum SIC)

. 
. keep if s == 1
(49795 observations deleted)

. keep  ticker year SIC links*

. so ticker year

. 
. desc

Contains data from links_big.dta
  obs:        10,779                          
 vars:            17                          19 Dec 2014 18:59
 size:       754,530                          
------------------------------------------------------------------------------------------------------------------------------------------
              storage   display    value
variable name   type    format     label      variable label
------------------------------------------------------------------------------------------------------------------------------------------
ticker          str8    %9s                   TICKER
year            float   %9.0g                 
SIC             int     %10.0g                SIC Code (Primary)
links1_tot      float   %9.0g                 Total direct links with double counting-RJV
links2_tot      float   %9.0g                 Total direct links without double counting-RJV
links1          float   %9.0g                 Direct links with double counting
links2          float   %9.0g                 Direct links without double counting
links_MS1_tot   float   %9.0g                 Market share of total direct links with double counting-RJV
links_MS2_tot   float   %9.0g                 Market share of total direct links without double counting-RJV
links1_same     float   %9.0g                 Direct links with double counting-RJV, Firm-SIC = RJV-SIC
links2_same     float   %9.0g                 Direct links without double counting-RJV, Firm-SIC = RJV-SIC
links_MS1_same  float   %9.0g                 Market share of direct links with double counting-RJV, Firm-SIC = RJV-SIC
links_MS2_same  float   %9.0g                 Market share of direct links without double counting-RJV, Firm-SIC = RJV-SIC
links1_diff     float   %9.0g                 Direct links with double counting-RJV, Firm-SIC is not RJV-SIC
links2_diff     float   %9.0g                 Direct links without double counting-RJV in same SIC, Firm-SIC is not RJV-SIC
links_MS1_diff  float   %9.0g                 Market share of direct links with double counting-RJV, Firm-SIC is not RJV-SIC
links_MS2_diff  float   %9.0g                 Market share of direct links without double counting-RJV, Firm-SIC is not RJV-SI
------------------------------------------------------------------------------------------------------------------------------------------
Sorted by:  ticker  year
     Note:  dataset has changed since last saved

. 
. save links.dta, replace
file links.dta saved

. clear mata

. log close
      name:  <unnamed>
       log:  E:\REStat_MS14767_Vol96(2)\Data preparation Compustat segment\8_links.log
  log type:  text
 closed on:  19 Dec 2014, 18:59:56
------------------------------------------------------------------------------------------------------------------------------------------
